package pretreatment;

import java.awt.List;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;

import fileReadAndWrite.fileRead;
import fileReadAndWrite.fileWrite;
import splitStringAddSpace.splitSource;

/** 
* @author 夏宇航 xiayuhang_ecust@qq.com: 
* @version 创建时间：2017年6月27日 下午3:08:19 
* 数据预处理，将各种零散文件合并在一起
*/
public class Merge {

	public static void main(String[] args) throws IOException {
//		merge("unlabeled-1/病史特点", "unlabeled-1/merge/bstd1.txt");
//		merge("unlabeled-1/出院情况", "unlabeled-1/merge/cyqk1.txt");
//		merge("unlabeled-1/一般项目", "unlabeled-1/merge/ybxm1.txt");
//		merge("unlabeled-1/诊疗经过", "unlabeled-1/merge/zljg1.txt");
//		
//		merge("unlabeled-2/病史特点", "unlabeled-1/merge/bstd2.txt");
//		merge("unlabeled-2/出院情况", "unlabeled-1/merge/cyqk2.txt");
//		merge("unlabeled-2/一般项目", "unlabeled-1/merge/ybxm2.txt");
//		merge("unlabeled-2/诊疗经过", "unlabeled-1/merge/zljg2.txt");
		
//		merge("unlabeled-1/merge", "unlabeled-1/merge/merge.txt");
		
//		merge("CCKS 2017 Task2/test dataset/01-一般项目-format2", "CCKS 2017 Task2/test dataset/merge/ybxm.txt");
//		merge("CCKS 2017 Task2/test dataset/02-病史特点-format2", "CCKS 2017 Task2/test dataset/merge/bstd.txt");
//		merge("CCKS 2017 Task2/test dataset/04-诊疗经过-format2", "CCKS 2017 Task2/test dataset/merge/zljg.txt");
//		merge("CCKS 2017 Task2/test dataset/05-出院情况-format2", "CCKS 2017 Task2/test dataset/merge/cyqk.txt");
//		merge("CCKS 2017 Task2/test dataset/merge", "CCKS 2017 Task2/test dataset/merge/merge.txt");
		
//		splitSource.splitNoSpace("CCKS 2017 Task2/test dataset/merge/zljg.txt", "CCKS 2017 Task2/test dataset/merge/zljgSplit.txt", "CCKS 2017 Task2/test dataset/merge/zljgPosition.txt");
//		splitSource.splitWithSpace("CCKS 2017 Task2/test dataset/merge/zljgSplit.txt", "CCKS 2017 Task2/test dataset/merge/zljgSplitWithSpace.txt");
	
//		merge("CCKS 2017 Task2/test dataset/merge/split", "CCKS 2017 Task2/test dataset/merge/splitMerge.txt");
		
//		merge("output02/result", "output02/result.txt");
//		merge("output/result", "output/result.txt");
		
		merge("train", "mergeAll.txt");
	}

	//将文件夹input目录下的所有文件合并，合并结果放在output文件中
	public static void merge(String input,String output) throws IOException{
		File dir=new File(input);
		ArrayList<String> list=new ArrayList<>();
		String aline=null;
		for(File file:dir.listFiles()){
			BufferedReader bReader=new fileRead().readFile(file.getAbsolutePath());
			aline=bReader.readLine();
			while(aline!=null&&!aline.equals("")){
//				aline = aline.replace("\t", "￥").replace(" ", "￥");//用测试集训练词向量时打开
				list.add(aline);
				aline=bReader.readLine();
			}
			bReader.close();
		}
		BufferedWriter bWriter=new fileWrite().writeFile(output);
		for(String a:list)
			bWriter.write(a+"\n");
		bWriter.close();
		
	}
}
